{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# GPU: 32*40 in 18.1s = 71/s\n",
    "# CPU: 32*8 in 44s = 6/s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
      "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
      "Numpy:  1.13.3\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "import caffe2\n",
    "from caffe2.proto import caffe2_pb2\n",
    "from caffe2.python import model_helper, core, workspace, models\n",
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Numpy: \", np.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6\r\n"
     ]
    }
   ],
   "source": [
    "!cat /proc/cpuinfo | grep processor | wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name\r\n",
      "Tesla K80\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi --query-gpu=gpu_name --format=csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "BATCH_SIZE = 32\n",
    "RESNET_FEATURES = 2048\n",
    "BATCHES_GPU = 40\n",
    "BATCHES_CPU = 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def give_fake_data(batches):\n",
    "    \"\"\" Create an array of fake data to run inference on\"\"\"\n",
    "    np.random.seed(0)\n",
    "    dta = np.random.rand(BATCH_SIZE*batches, 224, 224, 3).astype(np.float32)\n",
    "    return dta, np.swapaxes(dta, 1, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def yield_mb(X, batchsize):\n",
    "    \"\"\" Function yield (complete) mini_batches of data\"\"\"\n",
    "    for i in range(len(X)//batchsize):\n",
    "        yield i, X[i*batchsize:(i+1)*batchsize]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
     ]
    }
   ],
   "source": [
    "# Create batches of fake data\n",
    "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_GPU)\n",
    "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#%%bash\n",
    "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_init_net.pb\n",
    "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_predict_net.pb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_net(INIT_NET, PREDICT_NET, device_opts):\n",
    "    init_def = caffe2_pb2.NetDef()\n",
    "    with open(INIT_NET, 'rb') as f:\n",
    "        init_def.ParseFromString(f.read())\n",
    "        init_def.device_option.CopyFrom(device_opts)\n",
    "        workspace.RunNetOnce(init_def.SerializeToString())\n",
    "    net_def = caffe2_pb2.NetDef()\n",
    "    with open(PREDICT_NET, 'rb') as f:\n",
    "        net_def.ParseFromString(f.read())\n",
    "        net_def.device_option.CopyFrom(device_opts)\n",
    "        workspace.CreateNet(net_def.SerializeToString(), overwrite=True)\n",
    "    return net_def.name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_fn(classifier, data, batchsize):\n",
    "    \"\"\" Return features from classifier \"\"\"\n",
    "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
    "    for idx, dta in yield_mb(data, batchsize):\n",
    "        workspace.FeedBlob(\"data\", dta, device_option=device_opts)\n",
    "        workspace.RunNet(classifier, 1)\n",
    "        out[idx*batchsize:(idx+1)*batchsize] = workspace.FetchBlob('pool5').squeeze()\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "init_net_loc = 'resnet50_init_net.pb'\n",
    "predict_net_loc = 'resnet50_predict_net.pb'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0) \n",
    "test_net = load_net(init_net_loc, \n",
    "                    predict_net_loc,\n",
    "                    device_opts=device_opts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "cold_start = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 15.2 s, sys: 2.89 s, total: 18.1 s\n",
      "Wall time: 18.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# GPU: 18.1s\n",
    "features = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. CPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Kill all GPUs ...\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '-1'\n",
    "assert workspace.ResetWorkspace()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "device_opts = core.DeviceOption(caffe2_pb2.CPU) \n",
    "test_net = load_net(init_net_loc, \n",
    "                    predict_net_loc,\n",
    "                    device_opts=device_opts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(256, 224, 224, 3) (256, 3, 224, 224)\n"
     ]
    }
   ],
   "source": [
    "# Create batches of fake data\n",
    "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_CPU)\n",
    "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "cold_start = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4min, sys: 2.39 s, total: 4min 2s\n",
      "Wall time: 44 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# CPU: 44s\n",
    "features = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:py35]",
   "language": "python",
   "name": "conda-env-py35-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
